#Plot 1 Goal: Displaying how many professors there are by gender and on average how much they are each paid at University X
#Plot 1 data
data1 <- data %>%
group_by(sex) %>%
summarize(mean = mean(salary))
#Option 1: Histogram, I like this one because it gives you a sense of numbers (there are not enough female professors), and also a sense of how much less they are paid, but definitely not for a public audience.
plot1a <- ggplot(data, aes(salary)) +
geom_histogram(aes(fill = sex), alpha = 0.7, color= "white") +
facet_wrap(~sex) +
labs(title = "The Gender Gap in Academia:",
subtitle = "The Distribution of Faculty Salaries by Gender at University X \n",
x = " ",
y = "Number of Professors \n") +
theme(legend.position="none",
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5)) +
scale_fill_OkabeIto()+
scale_x_continuous(labels=scales::dollar)
plot1a

#Option 2: I like this version better because it is more concise and easy to see the distributions, even though they are on top of each other, allowing the space for annotations. This one might be better for a public audience
plot1b <- ggplot(data, aes(salary)) +
geom_histogram(aes(fill = sex), alpha = 0.6) +
labs(title = "The Gender Gap in Academia:",
subtitle = "The Distribution of Faculty Salaries by Gender at University X \n",
x = " ",
y = "Number of Professors \n") +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5),
legend.position="none") +
scale_fill_OkabeIto() +
scale_color_OkabeIto() +
geom_vline(data = data1, aes(xintercept = mean, color = sex), size = 2) +
scale_x_continuous(labels=scales::dollar) +
annotate("text", label = "Men's Average Salary: $115,090", x = 200000, y = 35, color = "#E69F00", face = "bold", size = 8) +
annotate("text", label = "Women's Average Salary: $101,002", x = 200000, y = 32, color = "#56B4E9", face = "bold", size = 8)
plot1b

#Option 3: Boxplot, I like that this shows you the mean/af the distributions, but I don't like that you can't see the actual data points and that you can't really get a sense of the discrepancy in the number of female and male professors. Also less understandable by the public.
plot1c <- ggplot(data, aes(x = sex, y = salary, fill = sex)) +
geom_boxplot(size = 1, alpha = 0.7) +
labs(title = "The Gender Gap in Academia:",
subtitle = "The Distribution of Faculty Salaries by Gender at University X \n",
y = " ",
x = " ") +
theme(legend.position="none",
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5)) +
scale_fill_OkabeIto() +
scale_y_continuous(labels=scales::dollar)
plot1c

#Option 4: I like this boxplot better because I overlayed the actual datapoints with the boxplot, but more appropriate for a scientific audience.
plot1d <- ggplot(data, aes(x = sex, y = salary)) +
geom_boxplot(size = 1) +
geom_jitter(width = 0.25, size = 3, alpha = 0.3, aes(color = sex)) +
labs(title = "The Gender Gap in Academia:",
subtitle = "The Distribution of Faculty Salaries by Gender at University X \n",
y = " ",
x = " ") +
theme(legend.position="none",
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5)) +
scale_color_OkabeIto() +
scale_y_continuous(labels=scales::dollar)
plot1d

#Final Version/Option 5:
plot1e <- ggplot(data, aes(x = sex, y = salary, color = sex)) +
geom_jitter(width = 0.25, alpha = 0.5, size = 3) +
theme(legend.position="none",
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5)) +
geom_segment(data = filter(data1, sex == "Male"), aes(x = 0.75, xend = 1.25, y = mean, yend = mean), linetype = "dashed", size = 1, color = "black") +
geom_segment(data = filter(data1, sex == "Female"), aes(x = 1.75, xend = 2.25, y = mean, yend = mean), linetype = "dashed", size = 1, color = "black") +
annotate("text", label = "Average: $115,090", x = 1, y = 250000, color = "#E69F00", face = "bold", size = 8) +
annotate("text", label = "Average: $101,002", x = 2, y = 250000, color = "#56B4E9", face = "bold", size = 8) +
labs(y = "Salary \n", x = " ") +
scale_color_OkabeIto() +
labs(title = "The Gender Gap in Academia:",
subtitle = "Faculty Salaries by Gender at University X \n",
caption = "Data Source: https://cran.r-project.org/web/packages/car/car.pdf",
x = "\n",
y = "\n") +
scale_y_continuous(labels=scales::dollar, limits = c(0, 250000))
plot1e

#Plot 2 Goal: Showing how many female and male professors there at University X, by rank
#Option 1: Just a simple stacked bar graph, but as we talked about in class, these kind of stink
plot2a <- ggplot(data, aes(x = sex, fill = rank)) +
geom_bar(alpha = 0.7) +
scale_fill_OkabeIto(name="Rank",
labels=c("Assistant Professor", "Associate Professor", "Full Professor")) +
labs(title = "University X Doesn't Hire Enough Female Professors",
subtitle = "And has a disproportionate number of men in the most powerful positions...\n",
y = "Number of Professors \n",
x = " ") +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5))
plot2a

ggplot_build(plot2a)$data
## [[1]]
## fill y count prop x PANEL group ymin ymax xmin xmax colour size
## 1 #009E73 248 248 1 1 1 3 0 248 0.55 1.45 NA 0.5
## 2 #56B4E9 302 54 1 1 1 2 248 302 0.55 1.45 NA 0.5
## 3 #E69F00 358 56 1 1 1 1 302 358 0.55 1.45 NA 0.5
## 4 #009E73 18 18 1 2 1 6 0 18 1.55 2.45 NA 0.5
## 5 #56B4E9 28 10 1 2 1 5 18 28 1.55 2.45 NA 0.5
## 6 #E69F00 39 11 1 2 1 4 28 39 1.55 2.45 NA 0.5
## linetype alpha
## 1 1 0.7
## 2 1 0.7
## 3 1 0.7
## 4 1 0.7
## 5 1 0.7
## 6 1 0.7
#Option 2: Dodged bar graph with coord flip
data2 <- data
data2$sex <- factor(data2$sex, levels = c("Female", "Male"))
plot2b <- ggplot(data2, aes(x = sex, fill = rank)) +
geom_bar(position = "dodge", alpha = 0.7) +
scale_fill_OkabeIto(name="Rank",
labels=c("Assistant Professor", "Associate Professor", "Full Professor")) +
coord_flip() +
labs(title = "University X Doesn't Hire Enough Female Professors",
subtitle = "And has a disproportionate number of men in the most powerful positions...\n",
y = "\n Number of Professors",
x = " ") +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5))
plot2b

#Final version/Option 3: 2 Waffle Charts
data_summary2 <- data2 %>%
group_by(sex, rank) %>%
summarize(n = n())
partsf <- c("Assistant Professor" = 11, "Associate Professor" = 10, "Full Professor" = 18)
plot2cw <- waffle(partsf,
rows = 2,
title = "\n",
size = 3,
colors = c("#E69F00", "#56B4E9", "#009E73"))
plot2cw

partsm <- c("Assistant Professor" = 56, "Associate Professor" = 54, "Full Professor" = 247)
plot2cm <- waffle(partsm,
rows = 17,
title = " ",
size = 3,
colors = c("#E69F00", "#56B4E9", "#009E73"))
plot2cm

figure <- ggarrange(plot2cm, plot2cw,
labels = c("Male Professors", "Female Professors"),
common.legend = TRUE, legend = "bottom",
ncol = 2, nrow = 1,
font.label = list(size = 25, color = "Black"))
figure

#Plot 3 Goal: Display the discrepancy in salaries between men and women at different professor ranks (assistant professor, associate professor, and full professor)
data3 <- data
data3 <- data3 %>%
mutate(rank = fct_recode(rank,
"Assistant Professor" = "AsstProf",
"Associate Professor" = "AssocProf",
"Full Professor" = "Prof"))
#Option 1: Bar graph, but apparantly dynamite plots are bad.....!
plot3a <- data3 %>%
group_by(sex, rank) %>%
summarize(mean = mean(salary),
salary_se = sqrt(var(salary, na.rm = TRUE) / length(salary))) %>%
ggplot(aes(x = sex, y = mean, fill = sex)) +
geom_col(alpha = 0.8) +
geom_errorbar(aes(ymin = (mean - 1.96*salary_se), ymax = (mean + 1.96*salary_se)),
color = "gray40",
width = 0.2,
size = 0.5) +
facet_wrap(~rank) +
labs(title = "University X Pays Female Professors Less At Every Level",
subtitle = "Mean Salary by Faculty Rank and gender...\n",
y = " ",
x = " ") +
theme(legend.position="none",
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5)) +
scale_fill_OkabeIto() +
scale_y_continuous(labels=dollar, limits = c(0,140000))
plot3a

#Option 2: Similar plot but with points
plot3b <- data3 %>%
group_by(sex, rank) %>%
summarize(mean = mean(salary),
salary_se = sqrt(var(salary, na.rm = TRUE) / length(salary))) %>%
ggplot(aes(x = sex, y = mean, color = sex)) +
geom_point(size = 5) +
geom_errorbar(aes(ymin = (mean - 1.96*salary_se), ymax = (mean + 1.96*salary_se)),
color = "gray40",
width = 0.2,
size = 0.5) +
facet_wrap(~rank) +
labs(title = "University X Doesn't Pay Female Professors as Much as Male Professors",
subtitle = "Mean Salary by Faculty Rank...\n",
y = "\n",
x = " ") +
theme(legend.position="none",
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5)) +
scale_color_OkabeIto() +
scale_y_continuous(labels=dollar, limits = c(0,140000))
plot3b

#Option 3: This is alittle bit different take on the same idea, and also adds a different element. It looks at the proportion of men's salaries that the women's salaries represent in the different professor ranks, but ALSO adds another piece of information- the discipline that they aare working in (either theoretical or applied). Thus, highlighting that the problem is really in the theoretical disciplines.
data3 <- data3 %>%
mutate(discipline = fct_recode(discipline,
"Theoretical" = "A",
"Applied" = "B"))
plot3c <- data3 %>%
group_by(sex, rank, discipline) %>%
summarize(mean = mean(salary)) %>%
spread(sex, mean) %>%
mutate(prop = (1 - ((Male - Female)/Male)),
prop = as.numeric(prop)) %>%
select(rank, discipline, prop) %>%
ggplot(aes(x = rank, y = prop, fill = rank)) +
geom_col(alpha = 0.8) +
geom_segment(aes(x = 0, xend = 4, y = 1, yend = 1), linetype = "dashed", size = 2, color = "black") +
labs(title = "Large Pay Gap in Theoretical Departments at University X",
subtitle = "Women's Pay Expressed as Percentage of Men's Pay\n",
y = "\n Percent of Men's Pay \n",
x = " ",
caption = "Data Source: https://cran.r-project.org/web/packages/car/car.pdf") +
theme(legend.position="none",
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5)) +
scale_fill_OkabeIto() +
facet_grid(discipline~.) +
scale_y_continuous(breaks = c(0.25, 0.5, 0.75, 1.0),
labels = c("25%", "50%", "75%", "100%"),
limits = c(0, 1.25)) +
coord_flip()
ann_text<-data.frame(rank=c("Assistant Professor","Associate Professor","Full Professor","Assistant Professor","Associate Professor","Full Professor"),
prop=c(1.10,1.10,1.10,1.10,1.10,1.10), discipline=c("Theoretical","Theoretical","Theoretical","Applied","Applied","Applied"),
label=c("98.2%","84.8%", "90.8%", "99.4%", "97.8%", "98.7%"))
plot3c + geom_text(data = ann_text,label=ann_text$label, size = 8)

#Plot 4 Goal: Display the salary of men and women by years of service and years since phd
plot4a <- ggplot(data3, aes(x = yrs.service, y = salary, color = sex)) +
geom_point(alpha = 0.6, size = 1.5) +
geom_smooth(method = "lm", se = FALSE, size = 1.5) +
labs(title = "Gender Pay Gap at University X As a Function of Years of Service",
subtitle = "Displayed Separately for Theoretical and Applied Departments",
y = "\n",
x = "\nYears of Service\n",
caption = "Data Source: https://cran.r-project.org/web/packages/car/car.pdf") +
scale_color_OkabeIto(name = "Gender") +
facet_wrap(~discipline) +
scale_y_continuous(labels=dollar, limits = c(0, 250000))
plot4a

plot4b <- ggplot(data3, aes(x = yrs.since.phd, y = salary, color = sex)) +
geom_point(alpha = 0.6, size = 1.5) +
geom_smooth(method = "lm", se = FALSE, size = 1.5) +
facet_wrap(~discipline) +
labs(title = "Gender Pay Gap at University X As a Function of Years Since Ph.D.",
subtitle = "Displayed Separately for Theoretical and Applied Departments",
y = "\n",
x = "\nYears Since Ph.D.\n",
caption = "Data Source: https://cran.r-project.org/web/packages/car/car.pdf") +
scale_color_OkabeIto(name = "Gender") +
scale_y_continuous(labels=dollar, limits = c(0, 250000))
plot4b
